{
  "cells": [
    {
      "cell_type": "markdown",
      "id": "5e0768d5",
      "metadata": {
        "id": "5e0768d5"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/jerryjliu/llama_index/blob/main/docs/examples/query_transformations/SimpleIndexDemo-multistep.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
      "metadata": {
        "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05"
      },
      "source": [
        "# Multi-Step Query Engine\n",
        "\n",
        "We have a multi-step query engine that's able to decompose a complex query into sequential subquestions. This\n",
        "guide walks you through how to set it up!"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "cc77f986",
      "metadata": {
        "id": "cc77f986"
      },
      "source": [
        "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "20a71098",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "20a71098",
        "outputId": "e001a673-d277-4839-bdec-d3433f035956"
      },
      "outputs": [],
      "source": [
        "%pip install llama-index-llms-openai\n",
        "%pip install llama-index-agent-openai\n",
        "!pip install llama-index"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "c5885d63",
      "metadata": {
        "id": "c5885d63"
      },
      "source": [
        "#### Download Data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "ee4ace96",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ee4ace96",
        "outputId": "0963ba23-3afc-419f-f148-f22630d18cbf"
      },
      "outputs": [],
      "source": [
        "!mkdir -p 'data/paul_graham/'\n",
        "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
      "metadata": {
        "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119"
      },
      "source": [
        "#### Load documents, build the VectorStoreIndex"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "65ebfca2",
      "metadata": {
        "id": "65ebfca2"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
      "metadata": {
        "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7"
      },
      "outputs": [],
      "source": [
        "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n",
        "from llama_index.llms.openai import OpenAI\n",
        "from IPython.display import Markdown, display"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "c48da73f-aadb-480c-8db1-99c915b7cc1c",
      "metadata": {
        "id": "c48da73f-aadb-480c-8db1-99c915b7cc1c"
      },
      "outputs": [],
      "source": [
        "# LLM (gpt-3.5)\n",
        "gpt35 = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
        "\n",
        "# LLM (gpt-4)\n",
        "gpt4 = OpenAI(temperature=0, model=\"gpt-4\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "03d1691e-544b-454f-825b-5ee12f7faa8a",
      "metadata": {
        "id": "03d1691e-544b-454f-825b-5ee12f7faa8a"
      },
      "outputs": [],
      "source": [
        "# load documents\n",
        "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58",
      "metadata": {
        "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58"
      },
      "outputs": [],
      "source": [
        "index = VectorStoreIndex.from_documents(documents)"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
      "metadata": {
        "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4"
      },
      "source": [
        "#### Query Index"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "95d989ba-0c1d-43b6-a1d3-0ea7135f43a6",
      "metadata": {
        "id": "95d989ba-0c1d-43b6-a1d3-0ea7135f43a6"
      },
      "outputs": [],
      "source": [
        "from llama_index.core.indices.query.query_transform.base import (\n",
        "    StepDecomposeQueryTransform,\n",
        ")\n",
        "\n",
        "# gpt-4\n",
        "step_decompose_transform = StepDecomposeQueryTransform(llm=gpt4, verbose=True)\n",
        "\n",
        "# gpt-3\n",
        "step_decompose_transform_gpt3 = StepDecomposeQueryTransform(\n",
        "    llm=gpt35, verbose=True\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "2a124db0-e2d7-4566-bcec-1d41cf669ff4",
      "metadata": {
        "id": "2a124db0-e2d7-4566-bcec-1d41cf669ff4"
      },
      "outputs": [],
      "source": [
        "index_summary = \"Used to answer questions about the author\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "85466fdf-93f3-4cb1-a5f9-0056a8245a6f",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "85466fdf-93f3-4cb1-a5f9-0056a8245a6f",
        "outputId": "1745877d-95f3-4266-a8e0-118ae21b09eb"
      },
      "outputs": [],
      "source": [
        "# set Logging to DEBUG for more detailed outputs\n",
        "from llama_index.core.query_engine import MultiStepQueryEngine\n",
        "\n",
        "query_engine = index.as_query_engine(llm=gpt4)\n",
        "query_engine = MultiStepQueryEngine(\n",
        "    query_engine=query_engine,\n",
        "    query_transform=step_decompose_transform,\n",
        "    index_summary=index_summary,\n",
        ")\n",
        "response_gpt4 = query_engine.query(\n",
        "    \"Who was in the first batch of the accelerator program the author\"\n",
        "    \" started?\",\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "7MeeUrf1xQcl",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7MeeUrf1xQcl",
        "outputId": "98ba7f16-ddf7-4aa2-d7f1-58f14e3e1783"
      },
      "outputs": [],
      "source": [
        "sub_qa = response_gpt4.metadata[\"sub_qa\"]\n",
        "tuples = [(t[0], t[1].response) for t in sub_qa]\n",
        "print(tuples)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "sA96XR4qxtYh",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "sA96XR4qxtYh",
        "outputId": "1763d5cf-f53e-4dfc-e761-ac392b7bcb9e"
      },
      "outputs": [],
      "source": [
        "latest_tuple = tuples[-1]\n",
        "latest_query = latest_tuple[0]\n",
        "print(latest_query)"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "iRiYf_rJy5Vb",
      "metadata": {
        "id": "iRiYf_rJy5Vb"
      },
      "source": [
        "New Technique"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "X2-aHSfByGyw",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "X2-aHSfByGyw",
        "outputId": "0b597cfc-7fe3-4270-8f44-1a36bf2fa527"
      },
      "outputs": [],
      "source": [
        "%load_ext autoreload\n",
        "%autoreload 2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "NCsUXEuqyI2S",
      "metadata": {
        "id": "NCsUXEuqyI2S"
      },
      "outputs": [],
      "source": [
        "from llama_index.core import SimpleDirectoryReader, VectorStoreIndex\n",
        "from llama_index.core.response.pprint_utils import pprint_response\n",
        "from llama_index.llms.openai import OpenAI"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "MdBfe2CEyKl-",
      "metadata": {
        "id": "MdBfe2CEyKl-"
      },
      "outputs": [],
      "source": [
        "llm = OpenAI(temperature=0, model=\"gpt-4\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "ff_byYewyL5K",
      "metadata": {
        "id": "ff_byYewyL5K"
      },
      "outputs": [],
      "source": [
        "engine = index.as_query_engine(similarity_top_k=3, llm=llm)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "kwjUemE5yTVB",
      "metadata": {
        "id": "kwjUemE5yTVB"
      },
      "outputs": [],
      "source": [
        "from llama_index.core.tools import QueryEngineTool\n",
        "\n",
        "\n",
        "query_tool_pg = QueryEngineTool.from_defaults(\n",
        "    query_engine=engine,\n",
        "    name=\"files\",\n",
        "    description=(\n",
        "        f\"Paul graham essay\"\n",
        "    ),\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "EZi7FVbwydNQ",
      "metadata": {
        "id": "EZi7FVbwydNQ"
      },
      "outputs": [],
      "source": [
        "# define query plan tool\n",
        "from llama_index.core.tools import QueryPlanTool\n",
        "from llama_index.core import get_response_synthesizer\n",
        "\n",
        "response_synthesizer = get_response_synthesizer()\n",
        "query_plan_tool = QueryPlanTool.from_defaults(\n",
        "    query_engine_tools=[query_tool_pg],\n",
        "    response_synthesizer=response_synthesizer,\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "DV9tC4DQyexP",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "DV9tC4DQyexP",
        "outputId": "a72383bc-af3d-43da-eed1-9eaf746dd872"
      },
      "outputs": [],
      "source": [
        "query_plan_tool.metadata.to_openai_tool()  # to_openai_function() deprecated"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "MPEwUMaAygE-",
      "metadata": {
        "id": "MPEwUMaAygE-"
      },
      "outputs": [],
      "source": [
        "from llama_index.agent.openai import OpenAIAgent\n",
        "from llama_index.llms.openai import OpenAI\n",
        "\n",
        "\n",
        "agent = OpenAIAgent.from_tools(\n",
        "    [query_plan_tool],\n",
        "    max_function_calls=10,\n",
        "    llm=OpenAI(temperature=0, model=\"gpt-4-0613\"),\n",
        "    verbose=True,\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "HGEzQEupyh9a",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "HGEzQEupyh9a",
        "outputId": "00f83ff8-68b2-4c22-f683-1e2da834c26f"
      },
      "outputs": [],
      "source": [
        "response = agent.query(latest_query)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "-IOyxIjpymvH",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-IOyxIjpymvH",
        "outputId": "ca25e7a5-8488-4469-c1a0-7f5a32a2ae53"
      },
      "outputs": [],
      "source": [
        "print(str(response))"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "llama",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}
